// Copyright (c) Meta Platforms, Inc. and affiliates.

#include <stdint.h>
#include <array>
#include <string_view>

#include <gtest/gtest.h>

#include "custom_parsers/zip_lexer.h"
#include "openzl/common/errors_internal.h"

using namespace ::testing;

namespace {
std::array<uint8_t, 948> kTestZip = {
    0x50, 0x4b, 0x03, 0x04, 0x0a, 0x00, 0x00, 0x00, 0x00, 0x00, 0xb1, 0x54,
    0x55, 0x5a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
    0x00, 0x00, 0x05, 0x00, 0x1c, 0x00, 0x74, 0x65, 0x73, 0x74, 0x2f, 0x55,
    0x54, 0x09, 0x00, 0x03, 0xed, 0xc7, 0xb8, 0x67, 0xee, 0xc7, 0xb8, 0x67,
    0x75, 0x78, 0x0b, 0x00, 0x01, 0x04, 0x9a, 0x7d, 0x00, 0x00, 0x04, 0x64,
    0x00, 0x00, 0x00, 0x50, 0x4b, 0x03, 0x04, 0x0a, 0x00, 0x00, 0x00, 0x00,
    0x00, 0xa6, 0x54, 0x55, 0x5a, 0x20, 0x30, 0x3a, 0x36, 0x06, 0x00, 0x00,
    0x00, 0x06, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x1c, 0x00, 0x74, 0x65, 0x73,
    0x74, 0x2f, 0x68, 0x65, 0x6c, 0x6c, 0x6f, 0x55, 0x54, 0x09, 0x00, 0x03,
    0xd7, 0xc7, 0xb8, 0x67, 0xd8, 0xc7, 0xb8, 0x67, 0x75, 0x78, 0x0b, 0x00,
    0x01, 0x04, 0x9a, 0x7d, 0x00, 0x00, 0x04, 0x64, 0x00, 0x00, 0x00, 0x68,
    0x65, 0x6c, 0x6c, 0x6f, 0x0a, 0x50, 0x4b, 0x03, 0x04, 0x0a, 0x00, 0x00,
    0x00, 0x00, 0x00, 0xa9, 0x54, 0x55, 0x5a, 0xa8, 0x61, 0x38, 0xdd, 0x06,
    0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x1c, 0x00, 0x74,
    0x65, 0x73, 0x74, 0x2f, 0x77, 0x6f, 0x72, 0x6c, 0x64, 0x55, 0x54, 0x09,
    0x00, 0x03, 0xdd, 0xc7, 0xb8, 0x67, 0xdd, 0xc7, 0xb8, 0x67, 0x75, 0x78,
    0x0b, 0x00, 0x01, 0x04, 0x9a, 0x7d, 0x00, 0x00, 0x04, 0x64, 0x00, 0x00,
    0x00, 0x77, 0x6f, 0x72, 0x6c, 0x64, 0x0a, 0x50, 0x4b, 0x03, 0x04, 0x0a,
    0x00, 0x00, 0x00, 0x00, 0x00, 0xae, 0x54, 0x55, 0x5a, 0x18, 0x6b, 0xe6,
    0x7d, 0x1b, 0x00, 0x00, 0x00, 0x1b, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x1c,
    0x00, 0x74, 0x65, 0x73, 0x74, 0x2f, 0x6c, 0x6f, 0x72, 0x65, 0x6d, 0x55,
    0x54, 0x09, 0x00, 0x03, 0xe7, 0xc7, 0xb8, 0x67, 0xe8, 0xc7, 0xb8, 0x67,
    0x75, 0x78, 0x0b, 0x00, 0x01, 0x04, 0x9a, 0x7d, 0x00, 0x00, 0x04, 0x64,
    0x00, 0x00, 0x00, 0x6c, 0x6f, 0x72, 0x65, 0x6d, 0x20, 0x69, 0x70, 0x73,
    0x75, 0x6d, 0x20, 0x62, 0x6c, 0x61, 0x68, 0x20, 0x62, 0x6c, 0x61, 0x68,
    0x20, 0x62, 0x6c, 0x61, 0x68, 0x0a, 0x50, 0x4b, 0x03, 0x04, 0x0a, 0x00,
    0x00, 0x00, 0x00, 0x00, 0xb3, 0x54, 0x55, 0x5a, 0x00, 0x00, 0x00, 0x00,
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x09, 0x00, 0x1c, 0x00,
    0x74, 0x65, 0x73, 0x74, 0x2f, 0x64, 0x69, 0x72, 0x2f, 0x55, 0x54, 0x09,
    0x00, 0x03, 0xf2, 0xc7, 0xb8, 0x67, 0x25, 0xc8, 0xb8, 0x67, 0x75, 0x78,
    0x0b, 0x00, 0x01, 0x04, 0x9a, 0x7d, 0x00, 0x00, 0x04, 0x64, 0x00, 0x00,
    0x00, 0x50, 0x4b, 0x03, 0x04, 0x0a, 0x00, 0x00, 0x00, 0x00, 0x00, 0xb3,
    0x54, 0x55, 0x5a, 0xc1, 0x89, 0xec, 0x2f, 0x05, 0x00, 0x00, 0x00, 0x05,
    0x00, 0x00, 0x00, 0x0d, 0x00, 0x1c, 0x00, 0x74, 0x65, 0x73, 0x74, 0x2f,
    0x64, 0x69, 0x72, 0x2f, 0x66, 0x69, 0x6c, 0x65, 0x55, 0x54, 0x09, 0x00,
    0x03, 0xf2, 0xc7, 0xb8, 0x67, 0xf2, 0xc7, 0xb8, 0x67, 0x75, 0x78, 0x0b,
    0x00, 0x01, 0x04, 0x9a, 0x7d, 0x00, 0x00, 0x04, 0x64, 0x00, 0x00, 0x00,
    0x66, 0x69, 0x6c, 0x65, 0x0a, 0x50, 0x4b, 0x01, 0x02, 0x1e, 0x03, 0x0a,
    0x00, 0x00, 0x00, 0x00, 0x00, 0xb1, 0x54, 0x55, 0x5a, 0x00, 0x00, 0x00,
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x05, 0x00, 0x18,
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x00, 0xed, 0x41, 0x00,
    0x00, 0x00, 0x00, 0x74, 0x65, 0x73, 0x74, 0x2f, 0x55, 0x54, 0x05, 0x00,
    0x03, 0xed, 0xc7, 0xb8, 0x67, 0x75, 0x78, 0x0b, 0x00, 0x01, 0x04, 0x9a,
    0x7d, 0x00, 0x00, 0x04, 0x64, 0x00, 0x00, 0x00, 0x50, 0x4b, 0x01, 0x02,
    0x1e, 0x03, 0x0a, 0x00, 0x00, 0x00, 0x00, 0x00, 0xa6, 0x54, 0x55, 0x5a,
    0x20, 0x30, 0x3a, 0x36, 0x06, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00,
    0x0a, 0x00, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
    0xa4, 0x81, 0x3f, 0x00, 0x00, 0x00, 0x74, 0x65, 0x73, 0x74, 0x2f, 0x68,
    0x65, 0x6c, 0x6c, 0x6f, 0x55, 0x54, 0x05, 0x00, 0x03, 0xd7, 0xc7, 0xb8,
    0x67, 0x75, 0x78, 0x0b, 0x00, 0x01, 0x04, 0x9a, 0x7d, 0x00, 0x00, 0x04,
    0x64, 0x00, 0x00, 0x00, 0x50, 0x4b, 0x01, 0x02, 0x1e, 0x03, 0x0a, 0x00,
    0x00, 0x00, 0x00, 0x00, 0xa9, 0x54, 0x55, 0x5a, 0xa8, 0x61, 0x38, 0xdd,
    0x06, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x18, 0x00,
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xa4, 0x81, 0x89, 0x00,
    0x00, 0x00, 0x74, 0x65, 0x73, 0x74, 0x2f, 0x77, 0x6f, 0x72, 0x6c, 0x64,
    0x55, 0x54, 0x05, 0x00, 0x03, 0xdd, 0xc7, 0xb8, 0x67, 0x75, 0x78, 0x0b,
    0x00, 0x01, 0x04, 0x9a, 0x7d, 0x00, 0x00, 0x04, 0x64, 0x00, 0x00, 0x00,
    0x50, 0x4b, 0x01, 0x02, 0x1e, 0x03, 0x0a, 0x00, 0x00, 0x00, 0x00, 0x00,
    0xae, 0x54, 0x55, 0x5a, 0x18, 0x6b, 0xe6, 0x7d, 0x1b, 0x00, 0x00, 0x00,
    0x1b, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00,
    0x00, 0x00, 0x00, 0x00, 0xa4, 0x81, 0xd3, 0x00, 0x00, 0x00, 0x74, 0x65,
    0x73, 0x74, 0x2f, 0x6c, 0x6f, 0x72, 0x65, 0x6d, 0x55, 0x54, 0x05, 0x00,
    0x03, 0xe7, 0xc7, 0xb8, 0x67, 0x75, 0x78, 0x0b, 0x00, 0x01, 0x04, 0x9a,
    0x7d, 0x00, 0x00, 0x04, 0x64, 0x00, 0x00, 0x00, 0x50, 0x4b, 0x01, 0x02,
    0x1e, 0x03, 0x0a, 0x00, 0x00, 0x00, 0x00, 0x00, 0xb3, 0x54, 0x55, 0x5a,
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
    0x09, 0x00, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x00,
    0xed, 0x41, 0x32, 0x01, 0x00, 0x00, 0x74, 0x65, 0x73, 0x74, 0x2f, 0x64,
    0x69, 0x72, 0x2f, 0x55, 0x54, 0x05, 0x00, 0x03, 0xf2, 0xc7, 0xb8, 0x67,
    0x75, 0x78, 0x0b, 0x00, 0x01, 0x04, 0x9a, 0x7d, 0x00, 0x00, 0x04, 0x64,
    0x00, 0x00, 0x00, 0x50, 0x4b, 0x01, 0x02, 0x1e, 0x03, 0x0a, 0x00, 0x00,
    0x00, 0x00, 0x00, 0xb3, 0x54, 0x55, 0x5a, 0xc1, 0x89, 0xec, 0x2f, 0x05,
    0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x0d, 0x00, 0x18, 0x00, 0x00,
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xa4, 0x81, 0x75, 0x01, 0x00,
    0x00, 0x74, 0x65, 0x73, 0x74, 0x2f, 0x64, 0x69, 0x72, 0x2f, 0x66, 0x69,
    0x6c, 0x65, 0x55, 0x54, 0x05, 0x00, 0x03, 0xf2, 0xc7, 0xb8, 0x67, 0x75,
    0x78, 0x0b, 0x00, 0x01, 0x04, 0x9a, 0x7d, 0x00, 0x00, 0x04, 0x64, 0x00,
    0x00, 0x00, 0x50, 0x4b, 0x05, 0x06, 0x00, 0x00, 0x00, 0x00, 0x06, 0x00,
    0x06, 0x00, 0xdd, 0x01, 0x00, 0x00, 0xc1, 0x01, 0x00, 0x00, 0x00, 0x00
};

std::array<uint8_t, 152> kTestZip64 = {
    0x50, 0x4b, 0x03, 0x04, 0x2d, 0x00, 0x08, 0x00, 0x08, 0x00, 0xd0, 0x70,
    0x63, 0x5a, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
    0xff, 0xff, 0x01, 0x00, 0x14, 0x00, 0x2d, 0x01, 0x00, 0x10, 0x00, 0x00,
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
    0x00, 0x00, 0x00, 0x4b, 0xcb, 0xcc, 0x49, 0x35, 0xe0, 0x02, 0x00, 0x50,
    0x4b, 0x07, 0x08, 0x45, 0xc6, 0x32, 0xfb, 0x08, 0x00, 0x00, 0x00, 0x00,
    0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x50,
    0x4b, 0x01, 0x02, 0x1e, 0x03, 0x2d, 0x00, 0x08, 0x00, 0x08, 0x00, 0xd0,
    0x70, 0x63, 0x5a, 0x45, 0xc6, 0x32, 0xfb, 0x08, 0x00, 0x00, 0x00, 0x06,
    0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01,
    0x00, 0x00, 0x00, 0x80, 0x11, 0x00, 0x00, 0x00, 0x00, 0x2d, 0x50, 0x4b,
    0x05, 0x06, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x01, 0x00, 0x2f, 0x00,
    0x00, 0x00, 0x53, 0x00, 0x00, 0x00, 0x00, 0x00
};

std::array<uint8_t, 22> kTestEmptyZip = { 0x50, 0x4b, 0x05, 0x06, 0x00, 0x00,
                                          0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
                                          0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
                                          0x00, 0x00, 0x00, 0x00 };

void testZipLexer(
        ZS2_ZipLexer* lexer,
        std::string_view data,
        bool empty = false)
{
    ZL_REQUIRE_SUCCESS(ZS2_ZipLexer_init(lexer, data.data(), data.size()));
    const size_t numFiles = ZS2_ZipLexer_numFiles(lexer);
    if (empty) {
        ASSERT_EQ(numFiles, 0);
    } else {
        ASSERT_NE(numFiles, 0);
    }
    ZS2_ZipToken tokens[32];
    const auto report = ZS2_ZipLexer_lex(lexer, tokens, 32);
    ZL_REQUIRE_SUCCESS(report);
    const auto numTokens = ZL_validResult(report);
    ASSERT_LT(numTokens, 32);

    size_t numFilesFound          = 0;
    bool hasCentralDirectory      = false;
    bool hasEndOfCentralDirectory = false;
    auto ptr                      = data.data();
    for (size_t i = 0; i < numTokens; ++i) {
        if (tokens[i].type == ZS2_ZipTokenType_CentralDirectory) {
            hasCentralDirectory = true;
        }
        if (tokens[i].type == ZS2_ZipTokenType_EndOfCentralDirectoryRecord) {
            hasEndOfCentralDirectory = true;
        }
        if (tokens[i].type == ZS2_ZipTokenType_LocalFileHeader) {
            ++numFilesFound;
        }
        if (tokens[i].type == ZS2_ZipTokenType_CompressedData) {
            ASSERT_NE(i, 0);
            ASSERT_EQ(tokens[i - 1].type, ZS2_ZipTokenType_LocalFileHeader);
        }
        ASSERT_EQ(tokens[i].ptr - data.data(), ptr - data.data());
        ASSERT_LE(ptr + tokens[i].size, data.data() + data.size());
        ptr += tokens[i].size;
    }
    ASSERT_TRUE(hasCentralDirectory);
    ASSERT_TRUE(hasEndOfCentralDirectory);
    ASSERT_EQ(numFiles, numFilesFound);
    ASSERT_EQ(ptr, data.data() + data.size());
}
} // namespace

TEST(ZipLexerTest, Basic)
{
    ZS2_ZipLexer lexer;
    testZipLexer(&lexer, { (const char*)kTestZip.begin(), kTestZip.size() });
}

TEST(ZipLexerTest, GarbageAtBeginning)
{
    ZS2_ZipLexer lexer;
    for (size_t garbage = 1; garbage < 1000; ++garbage) {
        std::string data(garbage, 'x');
        data.append(kTestZip.begin(), kTestZip.end());
        testZipLexer(&lexer, data);
        ASSERT_EQ(lexer.zipBegin, data.data() + garbage);
    }
    for (size_t garbage = 10000; garbage < 100000; garbage += 10000) {
        std::string data(garbage, 'x');
        data.append(kTestZip.begin(), kTestZip.end());
        testZipLexer(&lexer, data);
        ASSERT_EQ(lexer.zipBegin, data.data() + garbage);
    }
    for (size_t garbage = 100000; garbage < 1000000; garbage += 100000) {
        std::string data(garbage, 'x');
        data.append(kTestZip.begin(), kTestZip.end());
        testZipLexer(&lexer, data);
        ASSERT_EQ(lexer.zipBegin, data.data() + garbage);
    }
}

TEST(ZipLexerTest, GarbageAtEnd)
{
    ZS2_ZipLexer lexer;
    for (size_t garbage = 1; garbage < 1000; ++garbage) {
        std::string data((const char*)kTestZip.begin(), kTestZip.size());
        data.append(garbage, 'x');
        testZipLexer(&lexer, data);
        ASSERT_EQ(lexer.zipBegin, data.data());
    }
    for (size_t garbage = 10000; garbage < 100000; garbage += 10000) {
        std::string data((const char*)kTestZip.begin(), kTestZip.size());
        data.append(garbage, 'x');
        testZipLexer(&lexer, data);
        ASSERT_EQ(lexer.zipBegin, data.data());
    }
    for (size_t garbage = 100000; garbage < 1000000; garbage += 100000) {
        std::string data((const char*)kTestZip.begin(), kTestZip.size());
        data.append(garbage, 'x');
        testZipLexer(&lexer, data);
        ASSERT_EQ(lexer.zipBegin, data.data());
    }
}

TEST(ZipLexerTest, GarbageAtBeginningAndEnd)
{
    ZS2_ZipLexer lexer;
    for (size_t garbage = 1; garbage < 1000; ++garbage) {
        std::string data(garbage, 'x');
        data.append(kTestZip.begin(), kTestZip.end());
        data.append(garbage, 'x');
        testZipLexer(&lexer, data);
        ASSERT_EQ(lexer.zipBegin, data.data() + garbage);
    }
    for (size_t garbage = 10000; garbage < 100000; garbage += 10000) {
        std::string data(garbage, 'x');
        data.append(kTestZip.begin(), kTestZip.end());
        data.append(garbage, 'x');
        testZipLexer(&lexer, data);
        ASSERT_EQ(lexer.zipBegin, data.data() + garbage);
    }
    for (size_t garbage = 100000; garbage < 1000000; garbage += 100000) {
        std::string data(garbage, 'x');
        data.append(kTestZip.begin(), kTestZip.end());
        data.append(garbage, 'x');
        testZipLexer(&lexer, data);
        ASSERT_EQ(lexer.zipBegin, data.data() + garbage);
    }
}

TEST(ZipLexerTest, Zip64)
{
    ZS2_ZipLexer lexer;
    testZipLexer(
            &lexer, { (const char*)kTestZip64.begin(), kTestZip64.size() });
}

TEST(ZipLexerTest, EmptyZip)
{
    ZS2_ZipLexer lexer;
    testZipLexer(
            &lexer,
            { (const char*)kTestEmptyZip.begin(), kTestEmptyZip.size() },
            /* empty */ true);

    // garbage before
    std::string data(100, 'x');
    data.append(kTestEmptyZip.begin(), kTestEmptyZip.end());
    testZipLexer(&lexer, data, /* empty */ true);

    // garbage before & after
    data.append(100, 'x');
    testZipLexer(&lexer, data, /* empty */ true);

    // garbage after
    data.assign(kTestEmptyZip.begin(), kTestEmptyZip.end());
    data.append(100, 'x');
    testZipLexer(&lexer, data, /* empty */ true);
}
